from lxml import etree

html_str = """
<html>
<body>
<h1 id="title">This is a Heading</h1>
<p class="intro">This is a paragraph.</p>

<ol>
    <li id = "li111><a href="https://www.baidu.com">百度1</a></li>
    <li id = "li111><a href="https://www.google.com">谷歌2</a></li>
    <li id = "li111><a href="https://www.google1.com">谷歌3</a></li>
</ol>

<ol>
    <li id = "li111><a href="https://www.baidu.com">百度111</a></li>
    <li id = "li111><a href="https://www.google.com">谷歌222</a></li>
    <li id = "li111><a href="https://www.google1.com">谷歌333</a></li>
</ol>

<ul>
    <li id = "li111><a href="https://www.baidu.com">百度11</a></li>
    <li id = "li112><a href="https://www.google.com">谷歌22</a></li>
    <li id = "li113><a href="https://www.google1.com">谷歌33</a></li>
</ul>
<h1 class = "ss"><a href="https://www.baidu.com">百度11</a></h1>

</body>
</html>

"""
tree = etree.HTML(html_str)
ul = tree.xpath('//ul')[0]
print(ul)
lis = ul.xpath('./li')
for li in lis:
    print(li.text)
ol2 = ul.xpath('../ol')
for ol in ol2:
    lis = ol.xpath('./li')
    for li in lis:
        print(li.text)
#
lis = tree.xpath('//ol/li')
for li in lis:
    print(li.text)

print("============")
#
#
a_s = ol2[0].xpath('.//a')
print(a_s)

for a in a_s:
    print(a.text, a.attrib.get('href'))
#
#
a_s = tree.xpath('//a')
for a in a_s:
    print(a.xpath('./text()'), a.xpath('./@href'))
print("===============================")

print(tree.xpath('//h1/a/text()'), tree.xpath('//h1/a/@href'))







# tree = etree.HTML(html_str)
#
# lis1 = tree.xpath('//ol/li/a')
# for li in lis1:
#     print(li.text, li.attrib, li.tag)
# lis2  = tree.xpath('//*[@href]')
# for li in lis2:
#     print(li.text, li.attrib, li.tag)
# lis3 = tree.xpath('//*[@href = "https://www.baidu.com"]')
# print("===============================")
# for li in lis3:
#     print(li.text, li.attrib, li.tag)
#
# lis4 = tree.xpath('//li[1]|//li[last()]|//li[last()-1]')
# for li in lis4:
#     print(li.text, li.attrib, li.tag)
#
# lis5 = tree.xpath('//li[position()>1]')
# for li in lis5:
#     print(li.attrib)
#
# print("===============================")
#
# a_s_text = tree.xpath("//h1[@class]/a/text()")
# print(a_s_text)
#
#
# lis = tree.xpath('//li[position()<2 or position()>2]')
#
# print(lis)
# print("===============================")
# a = tree.xpath('//a[contains(text(), "百")]')
# a1 = tree.xpath('//a[contains(@href, "baidu")]')
#
# a2 = tree.xpath('//a[starts-with(text(), "百")]')
# a3 = tree.xpath('//a[starts-with(@href, "http")]')
# print(a, a1, a2, a3)


# tree = etree.HTML(html_str)
# print(dir(tree))
# print(type( tree))
#
# title0 = tree.cssselect('#title')
# for title in title0:
#     print(title.text, title.tag,title.attrib)
# ino = tree.cssselect('.intro')
# for ino0 in ino:
#     print(ino0.text)
#
# lis = tree.cssselect('li a')
# for li in lis:
#     print(li.text)
# lis1 = tree.cssselect('ol li a')
# for li in lis1:
#     print(li.text)
#     a = li.cssselect('a')[0]
#     print(a.text, a.attrib, a.tag)
#



